set more off
use "$dir\datasets\NSS dataset creation\nss_64_10\data_output\merged_all_NSS_NREGA_61_64.dta", clear


*** Sample restrictions: Phase 2 and Phase 3 districts, individuals in rural areas, 18-60, at most secondary education, men
drop if rank==.
drop if nrega_phase_1==1

keep if sector==1

keep if age>=18 & age<=60

gen education=.
replace education=1 if general_education==1 & round==64
replace education=2 if general_education==2 & round==64
replace education=2 if general_education==3 & round==64
replace education=2 if general_education==4 & round==64
replace education=2 if general_education==5 & round==64
replace education=3 if general_education==6 & round==64
replace education=4 if general_education==7 & round==64
replace education=5 if general_education==8 & round==64
replace education=6 if general_education==10 & round==64
replace education=7 if general_education==11 & round==64
replace education=8 if general_education==12 & round==64
replace education=9 if general_education==13 & round==64
replace education=10 if general_education==14 & round==64

replace education=1 if general_education==1 & round==61
replace education=2 if general_education==2 & round==61
replace education=2 if general_education==3 & round==61
replace education=2 if general_education==4 & round==61
replace education=3 if general_education==5 & round==61
replace education=4 if general_education==6 & round==61
replace education=5 if general_education==7 & round==61
replace education=6 if general_education==8 & round==61
replace education=7 if general_education==10 & round==61
replace education=8 if general_education==11 & round==61
replace education=9 if general_education==12 & round==61
replace education=10 if general_education==13 & round==61

cap label drop education
label define education 1 "not literate"
label define education 2 "literate without formal schooling", add
label define education 3 "below primary", add
label define education 4 "primary", add
label define education 5 "upper primary/middle", add
label define education 6 "secondary", add
label define education 7 "higher secondary", add
label define education 8 "diploma/certificate course", add
label define education 9 "graduate", add
label define education 10 "postgraduate and above", add

label values education education

keep if education<=6

keep if sex==1

* district FE
gen state_dist=state*100+district

**************** outcomes ***********

* first clean the work intensity variables
gen intensity11=0
replace intensity11=5 if intensity_day_1st1==5
replace intensity11=10 if intensity_day_1st1==10
replace intensity11=5 if intensity_1st_day1==5
replace intensity11=10 if intensity_1st_day1==10

gen intensity21=0
replace intensity21=5 if intensity_day_2nd1==5
replace intensity21=10 if intensity_day_2nd1==10
replace intensity21=5 if intensity_2nd_day1==5
replace intensity21=10 if intensity_2nd_day1==10

gen intensity31=0
replace intensity31=5 if intensity_day_3rd1==5
replace intensity31=10 if intensity_day_3rd1==10
replace intensity31=5 if intensity_3rd_day1==5
replace intensity31=10 if intensity_3rd_day1==10

gen intensity41=0
replace intensity41=5 if intensity_day_4th1==5
replace intensity41=10 if intensity_day_4th1==10
replace intensity41=5 if intensity_4th_day1==5
replace intensity41=10 if intensity_4th_day1==10

gen intensity51=0
replace intensity51=5 if intensity_day_5th1==5
replace intensity51=10 if intensity_day_5th1==10
replace intensity51=5 if intensity_5th_day1==5
replace intensity51=10 if intensity_5th_day1==10

gen intensity61=0
replace intensity61=5 if intensity_day_6th1==5
replace intensity61=10 if intensity_day_6th1==10
replace intensity61=5 if intensity_6th_day1==5
replace intensity61=10 if intensity_6th_day1==10

gen intensity71=0
replace intensity71=5 if intensity_day_7th1==5
replace intensity71=10 if intensity_day_7th1==10
replace intensity71=5 if intensity_7th_day1==5
replace intensity71=10 if intensity_7th_day1==10


gen intensity12=0
replace intensity12=5 if intensity_day_1st2==5
replace intensity12=10 if intensity_day_1st2==10
replace intensity12=5 if intensity_1st_day2==5
replace intensity12=10 if intensity_1st_day2==10

gen intensity22=0
replace intensity22=5 if intensity_day_2nd2==5
replace intensity22=10 if intensity_day_2nd2==10
replace intensity22=5 if intensity_2nd_day2==5
replace intensity22=10 if intensity_2nd_day2==10

gen intensity32=0
replace intensity32=5 if intensity_day_3rd2==5
replace intensity32=10 if intensity_day_3rd2==10
replace intensity32=5 if intensity_3rd_day2==5
replace intensity32=10 if intensity_3rd_day2==10

gen intensity42=0
replace intensity42=5 if intensity_day_4th2==5
replace intensity42=10 if intensity_day_4th2==10
replace intensity42=5 if intensity_4th_day2==5
replace intensity42=10 if intensity_4th_day2==10

gen intensity52=0
replace intensity52=5 if intensity_day_5th2==5
replace intensity52=10 if intensity_day_5th2==10
replace intensity52=5 if intensity_5th_day2==5
replace intensity52=10 if intensity_5th_day2==10

gen intensity62=0
replace intensity62=5 if intensity_day_6th2==5
replace intensity62=10 if intensity_day_6th2==10
replace intensity62=5 if intensity_6th_day2==5
replace intensity62=10 if intensity_6th_day2==10

gen intensity72=0
replace intensity72=5 if intensity_day_7th2==5
replace intensity72=10 if intensity_day_7th2==10
replace intensity72=5 if intensity_7th_day2==5
replace intensity72=10 if intensity_7th_day2==10


gen intensity13=0
replace intensity13=5 if intensity_day_1st3==5
replace intensity13=10 if intensity_day_1st3==10
replace intensity13=5 if intensity_1st_day3==5
replace intensity13=10 if intensity_1st_day3==10

gen intensity23=0
replace intensity23=5 if intensity_day_2nd3==5
replace intensity23=10 if intensity_day_2nd3==10
replace intensity23=5 if intensity_2nd_day3==5
replace intensity23=10 if intensity_2nd_day3==10

gen intensity33=0
replace intensity33=5 if intensity_day_3rd3==5
replace intensity33=10 if intensity_day_3rd3==10
replace intensity33=5 if intensity_3rd_day3==5
replace intensity33=10 if intensity_3rd_day3==10

gen intensity43=0
replace intensity43=5 if intensity_day_4th3==5
replace intensity43=10 if intensity_day_4th3==10
replace intensity43=5 if intensity_4th_day3==5
replace intensity43=10 if intensity_4th_day3==10

gen intensity53=0
replace intensity53=5 if intensity_day_5th3==5
replace intensity53=10 if intensity_day_5th3==10
replace intensity53=5 if intensity_5th_day3==5
replace intensity53=10 if intensity_5th_day3==10

gen intensity63=0
replace intensity63=5 if intensity_day_6th3==5
replace intensity63=10 if intensity_day_6th3==10
replace intensity63=5 if intensity_6th_day3==5
replace intensity63=10 if intensity_6th_day3==10

gen intensity73=0
replace intensity73=5 if intensity_day_7th3==5
replace intensity73=10 if intensity_day_7th3==10
replace intensity73=5 if intensity_7th_day3==5
replace intensity73=10 if intensity_7th_day3==10


gen intensity14=0
replace intensity14=5 if intensity_day_1st4==5
replace intensity14=10 if intensity_day_1st4==10
replace intensity14=5 if intensity_1st_day4==5
replace intensity14=10 if intensity_1st_day4==10

gen intensity24=0
replace intensity24=5 if intensity_day_2nd4==5
replace intensity24=10 if intensity_day_2nd4==10
replace intensity24=5 if intensity_2nd_day4==5
replace intensity24=10 if intensity_2nd_day4==10

gen intensity34=0
replace intensity34=5 if intensity_day_3rd4==5
replace intensity34=10 if intensity_day_3rd4==10
replace intensity34=5 if intensity_3rd_day4==5
replace intensity34=10 if intensity_3rd_day4==10

gen intensity44=0
replace intensity44=5 if intensity_day_4th4==5
replace intensity44=10 if intensity_day_4th4==10
replace intensity44=5 if intensity_4th_day4==5
replace intensity44=10 if intensity_4th_day4==10

gen intensity54=0
replace intensity54=5 if intensity_day_5th4==5
replace intensity54=10 if intensity_day_5th4==10
replace intensity54=5 if intensity_5th_day4==5
replace intensity54=10 if intensity_5th_day4==10

gen intensity64=0
replace intensity64=5 if intensity_day_6th4==5
replace intensity64=10 if intensity_day_6th4==10
replace intensity64=5 if intensity_6th_day4==5
replace intensity64=10 if intensity_6th_day4==10

gen intensity74=0
replace intensity74=5 if intensity_day_7th4==5
replace intensity74=10 if intensity_day_7th4==10
replace intensity74=5 if intensity_7th_day4==5
replace intensity74=10 if intensity_7th_day4==10


gen intensity15=0
replace intensity15=5 if intensity_day_1st5==5
replace intensity15=10 if intensity_day_1st5==10

gen intensity25=0
replace intensity25=5 if intensity_day_2nd5==5
replace intensity25=10 if intensity_day_2nd5==10

gen intensity35=0
replace intensity35=5 if intensity_day_3rd5==5
replace intensity35=10 if intensity_day_3rd5==10

gen intensity45=0
replace intensity45=5 if intensity_day_4th5==5
replace intensity45=10 if intensity_day_4th5==10

gen intensity55=0
replace intensity55=5 if intensity_day_5th5==5
replace intensity55=10 if intensity_day_5th5==10

gen intensity65=0
replace intensity65=5 if intensity_day_6th5==5
replace intensity65=10 if intensity_day_6th5==10

gen intensity75=0
replace intensity75=5 if intensity_day_7th5==5
replace intensity75=10 if intensity_day_7th5==10

* private employment is considered to be casual labour in everything that is not family business and not public works
gen priv_empl1=0
replace priv_empl1=intensity11 + intensity21 + intensity31 + intensity41 + intensity51 + intensity61 + intensity71 if (status1==51 | curr_wkly_act_status1==51)
gen priv_empl2=0
replace priv_empl2=intensity12 + intensity22 + intensity32 + intensity42 + intensity52 + intensity62 + intensity72 if (status2==51 | curr_wkly_act_status2==51)
gen priv_empl3=0
replace priv_empl3=intensity13 + intensity23 + intensity33 + intensity43 + intensity53 + intensity63 + intensity73 if (status3==51 | curr_wkly_act_status3==51)
gen priv_empl4=0
replace priv_empl4=intensity14 + intensity24 + intensity34 + intensity44 + intensity54 + intensity64 + intensity74 if (status4==51 | curr_wkly_act_status4==51)
gen priv_empl5=0
replace priv_empl5=intensity15 + intensity25 + intensity35 + intensity45 + intensity55 + intensity65 + intensity75 if (status5==51)

gen priv_empl=0
replace priv_empl=(priv_empl1+priv_empl2+priv_empl3+priv_empl4+priv_empl5)/10

* private-sector employment
gen priv_empl_ext=0
replace priv_empl_ext=1 if priv_empl>0


* public employment in NREGS or other projects
gen pub_empl1=0
replace pub_empl1=intensity11 + intensity21 + intensity31 + intensity41 + intensity51 + intensity61 + intensity71 if (status1==41 | curr_wkly_act_status1==41 | curr_wkly_act_status1==42)
gen pub_empl2=0
replace pub_empl2=intensity12 + intensity22 + intensity32 + intensity42 + intensity52 + intensity62 + intensity72 if (status2==41 | curr_wkly_act_status2==41 | curr_wkly_act_status2==42)
gen pub_empl3=0
replace pub_empl3=intensity13 + intensity23 + intensity33 + intensity43 + intensity53 + intensity63 + intensity73 if (status3==41 | curr_wkly_act_status3==41 | curr_wkly_act_status3==42)
gen pub_empl4=0
replace pub_empl4=intensity14 + intensity24 + intensity34 + intensity44 + intensity54 + intensity64 + intensity74 if (status4==41 | curr_wkly_act_status4==41 | curr_wkly_act_status4==42)
gen pub_empl5=0
replace pub_empl5=intensity15 + intensity25 + intensity35 + intensity45 + intensity55 + intensity65 + intensity75 if (status5==41)

gen pub_empl=0
replace pub_empl=(pub_empl1+pub_empl2+pub_empl3+pub_empl4+pub_empl5)/10

* public-sector employment
gen pub_empl_ext=0
replace pub_empl_ext=1 if pub_empl>0

gen empl=pub_empl+priv_empl

* working in the family business: self-employed (11) or helper in family business (unpaid family worker, 21)
gen fam_empl1=0
replace fam_empl1=intensity11 + intensity21 + intensity31 + intensity41 + intensity51 + intensity61 + intensity71 if (status1==11 | status1==21 | curr_wkly_act_status1==11 | curr_wkly_act_status1==21)
gen fam_empl2=0
replace fam_empl2=intensity12 + intensity22 + intensity32 + intensity42 + intensity52 + intensity62 + intensity72 if (status2==11 | status2==21 | curr_wkly_act_status2==11 | curr_wkly_act_status2==21)
gen fam_empl3=0
replace fam_empl3=intensity13 + intensity23 + intensity33 + intensity43 + intensity53 + intensity63 + intensity73 if (status3==11 | status3==21 | curr_wkly_act_status3==11 | curr_wkly_act_status3==21)
gen fam_empl4=0
replace fam_empl4=intensity14 + intensity24 + intensity34 + intensity44 + intensity54 + intensity64 + intensity74 if (status4==11 | status4==21 | curr_wkly_act_status4==11 | curr_wkly_act_status4==21)
gen fam_empl5=0
replace fam_empl5=intensity15 + intensity25 + intensity35 + intensity45 + intensity55 + intensity65 + intensity75 if (status5==11 | status5==21)

gen fam_empl=0
replace fam_empl=(fam_empl1+fam_empl2+fam_empl3+fam_empl4+fam_empl5)/10

* family employment
gen fam_empl_ext=0
replace fam_empl_ext=1 if fam_empl>0


gen privfam_empl_ext=0
replace privfam_empl_ext=1 if priv_empl_ext==1 | fam_empl_ext==1


* for each job, take total wage and in-kind earnings; but only if job is a casual job
gen casual_wage1=0
replace casual_wage1=wage_salary_earn_total1 if round==64 & wage_salary_earn_total1!=. & (status1==41 | status1==51 | curr_wkly_act_status1==41 | curr_wkly_act_status1==42 | curr_wkly_act_status1==51)
replace casual_wage1=wage_total1 if round==61 & wage_total1!=. & (status1==41 | status1==51 | curr_wkly_act_status1==41 | curr_wkly_act_status1==42 | curr_wkly_act_status1==51)

gen casual_wage2=0
replace casual_wage2=wage_salary_earn_total2 if round==64 & wage_salary_earn_total2!=. & (status2==41 | status2==51 | curr_wkly_act_status2==41 | curr_wkly_act_status2==42 | curr_wkly_act_status2==51)
replace casual_wage2=wage_total2 if round==61 & wage_total2!=. & (status2==41 | status2==51 | curr_wkly_act_status2==41 | curr_wkly_act_status2==42 | curr_wkly_act_status2==51)

gen casual_wage3=0
replace casual_wage3=wage_salary_earn_total3 if round==64 & wage_salary_earn_total3!=. & (status3==41 | status3==51 | curr_wkly_act_status3==41 | curr_wkly_act_status3==42 | curr_wkly_act_status3==51)
replace casual_wage3=wage_total3 if round==61 & wage_total3!=. & (status3==41 | status3==51 | curr_wkly_act_status3==41 | curr_wkly_act_status3==42 | curr_wkly_act_status3==51)

gen casual_wage4=0
replace casual_wage4=wage_salary_earn_total4 if round==64 & wage_salary_earn_total4!=. & (status4==41 | status4==51 | curr_wkly_act_status4==41 | curr_wkly_act_status4==42 | curr_wkly_act_status4==51)
replace casual_wage4=wage_total4 if round==61 & wage_total4!=. & (status4==41 | status4==51 | curr_wkly_act_status4==41 | curr_wkly_act_status4==42 | curr_wkly_act_status4==51)
 
gen casual_wage5=0
replace casual_wage5=wage_total5 if round==61 & wage_total5!=. & (status5==41 | status5==51)

gen casual_wage=casual_wage1 + casual_wage2 + casual_wage3 + casual_wage4 + casual_wage5

gen daily_wage=casual_wage/empl if empl>0


* private casual wage
gen casual_wage_priv1=0
replace casual_wage_priv1=wage_salary_earn_total1 if round==64 & wage_salary_earn_total1!=. & (status1==51 | curr_wkly_act_status1==51)
replace casual_wage_priv1=wage_total1 if round==61 & wage_total1!=. & (status1==51 | curr_wkly_act_status1==51)

gen casual_wage_priv2=0
replace casual_wage_priv2=wage_salary_earn_total2 if round==64 & wage_salary_earn_total2!=. & (status2==51 | curr_wkly_act_status2==51)
replace casual_wage_priv2=wage_total2 if round==61 & wage_total2!=. & (status2==51 | curr_wkly_act_status2==51)

gen casual_wage_priv3=0
replace casual_wage_priv3=wage_salary_earn_total3 if round==64 & wage_salary_earn_total3!=. & (status3==51 | curr_wkly_act_status3==51)
replace casual_wage_priv3=wage_total3 if round==61 & wage_total3!=. & (status3==51 | curr_wkly_act_status3==51)

gen casual_wage_priv4=0
replace casual_wage_priv4=wage_salary_earn_total4 if round==64 & wage_salary_earn_total4!=. & (status4==51 | curr_wkly_act_status4==51)
replace casual_wage_priv4=wage_total4 if round==61 & wage_total4!=. & (status4==51 | curr_wkly_act_status4==51)
 
gen casual_wage_priv5=0
replace casual_wage_priv5=wage_total5 if round==61 & wage_total5!=. & (status5==51)

gen casual_wage_priv=casual_wage_priv1 + casual_wage_priv2 + casual_wage_priv3 + casual_wage_priv4 + casual_wage_priv5

gen daily_wage_priv=casual_wage_priv/priv_empl if priv_empl>0


* public casual wage
gen casual_wage_pub1=0
replace casual_wage_pub1=wage_salary_earn_total1 if round==64 & wage_salary_earn_total1!=. & (status1==41 | curr_wkly_act_status1==41 | curr_wkly_act_status1==42)
replace casual_wage_pub1=wage_total1 if round==61 & wage_total1!=. & (status1==41 | curr_wkly_act_status1==41 | curr_wkly_act_status1==42)

gen casual_wage_pub2=0
replace casual_wage_pub2=wage_salary_earn_total2 if round==64 & wage_salary_earn_total2!=. & (status2==41 | curr_wkly_act_status2==41 | curr_wkly_act_status2==42)
replace casual_wage_pub2=wage_total2 if round==61 & wage_total2!=. & (status2==41 | curr_wkly_act_status2==41 | curr_wkly_act_status2==42)

gen casual_wage_pub3=0
replace casual_wage_pub3=wage_salary_earn_total3 if round==64 & wage_salary_earn_total3!=. & (status3==41 | curr_wkly_act_status3==41 | curr_wkly_act_status3==42)
replace casual_wage_pub3=wage_total3 if round==61 & wage_total3!=. & (status3==41 | curr_wkly_act_status3==41 | curr_wkly_act_status3==42)

gen casual_wage_pub4=0
replace casual_wage_pub4=wage_salary_earn_total4 if round==64 & wage_salary_earn_total4!=. & (status4==41 | curr_wkly_act_status4==41 | curr_wkly_act_status4==42)
replace casual_wage_pub4=wage_total4 if round==61 & wage_total4!=. & (status4==41 | curr_wkly_act_status4==41 | curr_wkly_act_status4==42)
 
gen casual_wage_pub5=0
replace casual_wage_pub5=wage_total5 if round==61 & wage_total5!=. & (status5==41)

gen casual_wage_pub=casual_wage_pub1 + casual_wage_pub2 + casual_wage_pub3 + casual_wage_pub4 + casual_wage_pub5

gen daily_wage_pub=casual_wage_pub/pub_empl if pub_empl>0


gen log_daily_wage=log(daily_wage)
gen log_daily_wage_priv=log(daily_wage_priv)


bys state_code dist_code round: egen total_pop=total(multiplier)
drop if state_code==3 | state_code==24 | state_code==10 | state_code==19 | state_code==20

gen land_possessed_code_61=.
replace land_possessed_code_61=1 if land_possessed<=5
replace land_possessed_code_61=2 if land_possessed>5 & land_possessed<=10
replace land_possessed_code_61=3 if land_possessed>10 & land_possessed<=200
replace land_possessed_code_61=4 if land_possessed>200 & land_possessed<=400
replace land_possessed_code_61=5 if land_possessed>400 & land_possessed<=1000
replace land_possessed_code_61=6 if land_possessed>1000 & land_possessed<=2000
replace land_possessed_code_61=7 if land_possessed>2000 & land_possessed<=3000
replace land_possessed_code_61=8 if land_possessed>3000 & land_possessed<=4000
replace land_possessed_code_61=10 if land_possessed>4000 & land_possessed<=6000
replace land_possessed_code_61=11 if land_possessed>6000 & land_possessed<=8000
replace land_possessed_code_61=12 if land_possessed>8000 & land_possessed!=.

gen land_vari=land_possessed_code
replace land_vari=land_possessed_code_61 if land_vari==.
gen small_farmer=0
replace small_farmer=multiplier if land_vari<=6

bys state_code dist_code round: egen p_small_farmert=total(small_farmer)
bys state_code dist_code round: gen p_small_farmer=p_small_farmert/total_pop


*** collapse information to the district level ***
collapse(mean) pub_empl_ext privfam_empl_ext priv_empl_ext fam_empl_ext nrega_pred_2 nrega_phase_2 state_rank_norm_2 state_dist log_daily_wage log_daily_wage_priv daily_wage daily_wage_priv daily_wage_pub p_small_farmer [fweight=multiplier], by(state_code dist_code round sub_round)

reshape wide pub_empl_ext privfam_empl_ext priv_empl_ext fam_empl_ext log_daily_wage log_daily_wage_priv daily_wage daily_wage_priv daily_wage_pub p_small_farmer, i(state_code dist_code sub_round) j(round)

tab state_code, gen(state1)

gen state_rank_norm_sq_2=state_rank_norm_2*state_rank_norm_2
gen int_eff_2=nrega_pred_2*state_rank_norm_2
gen int_sq_2=nrega_pred_2*state_rank_norm_sq_2

* star states (according to Khera 2011)
gen star_state=0
replace star_state=1 if (state_code==8 | state_code==22 | state_code==23 | state_code==28 | state_code==33)

gen nrega_star=nrega_pred_2*star_state

** bigger than median proportion of small farmers
egen med_land=median(p_small_farmer61)
gen small_farmers=1 if p_small_farmer61>=med_land
replace small_farmers=0 if p_small_farmer61<med_land

keep state_code dist_code sub_round small_farmers
sort state_code dist_code sub_round
order state_code dist_code sub_round

bys state_code dist_code: egen high_var_num=total(small_farmers)
keep state_code dist_code high_var_num small_farmers
duplicates drop
save "$dir\datasets\annual_small_farmers_baseline.dta", replace

